capture log close
log using "Y:\Documents\Output\descriptives", replace text

*************************************************
* do-file to generate descriptive statistics	*
* fo 02.07.2015								    *	
*************************************************

version 13
clear
set more off
cd "Y:\Documents\"
use "Data\data_fach.dta", clear

* label majors
lab def fachnames 2 "Theology" 5 "History/Philosophy" 14 "Philology/Cult.Studies" ///
	15 "Psychology" 16 "Pedagogy" 22"Sports" 26 "Soc.Science/Pol.Science" 27 "Social Work" 28 "Law" 30 "Economic Sciences" ///
	31 "Industrial Engineering" 37 "Mathematics" 38 "Computer Science" 39 "Physics" ///
	40 "Chemistry" 41 "Pharmaceutics" 42 "Biology" 43 "Geosciences" 44 "Geography" 49 "Medical Sciences" 58 "Agronomy/Forestry" 60 "Nutrition Sci./Home Economics" 63 "Mechanical Engineering" ///
	64 "Electrical Engineering" 66 "Architecture/Inter.Des./Design" 68 "Civil Engineering" ///
	74 "Arts, Art Sciences" 80 "State Teacher"
lab val B1ber1ab1 fachnames

***		Dot Graphs		***
***************************
* proportion female
graph dot pfemale2, over(B1ber1ab1, sort(1) des label(labsize(*0.7))) scheme(s1mono) ysize(6.0) legend(label(1 "NEPS" )) ///
	marker(1, msymbol(circle) mcolor(gs0))
	graph save Graph "Output\dotgraph_propfemale.gph", replace
	graph export "Output\dotgraph_propfemale.emf", replace
	
* hrswork worklife
gen wldissat=100-worklife 	/* DISsatisfaction with work-life */
graph dot hrswork wldissat, exclude0 over(B1ber1ab1, sort(1) des label(labsize(*0.7))) scheme(s1mono) ysize(6.0) legend(label(1 "hrs.worked(ft)") label(2 "dissatisfaction")) ///
	marker(1, msymbol(circle) mcolor(gs0)) ///
	marker(2, msymbol(circle_hollow) mfcolor(white) mlcolor(gs0))
	graph save Graph "Output\dotgraph_worklife.gph", replace
	graph export "Output\dotgraph_worklife.emf", replace
	
* sdiscrim
graph dot sdiscrim, over(B1ber1ab1, sort(1) des label(labsize(*0.7))) scheme(s1mono) ysize(6.0) yline(0) ///
	marker(1, msymbol(circle) mcolor(gs0))
	graph save Graph "Output\dotgraph_discrim.gph", replace
	graph export "Output\dotgraph_discrim.emf", replace

* mathint
graph dot mathint, over(B1ber1ab1, sort(1) des label(labsize(*0.7))) scheme(s1mono) ysize(6.0) ///
	marker(1, msymbol(circle) mcolor(gs0))
	graph save Graph "Output\dotgraph_mathint.gph", replace
	graph export "Output\dotgraph_mathint.emf", replace
	
* intercpt 
graph dot intercpt, over(B1ber1ab1, sort(1) des label(labsize(*0.7))) scheme(s1mono) ysize(6.0) ///
	marker(1, msymbol(circle) mcolor(gs0))
	graph save Graph "Output\dotgraph_intercpt.gph", replace
	graph export "Output\dotgraph_intercpt.emf", replace

***		Bar Graphs		***
***************************
* apfmelt	
graph hbar apfelt apmelt apfmelt, over(B1ber1ab1, sort(3) des label(labsize(*0.7))) scheme(s1mono) ylabel(-20(20)100) bar(2, lcolor(black) fcolor(white)) bar(3, color(black)) ///
	legend(label(1 "For women") label(2 "For men") label(3 "Difference")) ysize(6.5)
	graph save Graph "Output\bargraph_approvelt.gph", replace
	graph export "Output\bargraph_approvelt.emf", replace
	
* apfmpeer
graph hbar apfpeer apmpeer apfmpeer, over(B1ber1ab1, sort(3) des label(labsize(*0.7))) scheme(s1mono) ylabel(-20(20)100) bar(2, lcolor(black) fcolor(white)) bar(3, color(black)) ///
	legend(label(1 "For women") label(2 "For men") label(3 "Difference")) ysize(7.0) 
	graph save Graph "Output\bargraph_approvpeer.gph", replace
	graph export "Output\bargraph_approvpeer.emf", replace

***		correlation matrix		***
***********************************
corr pfemale2 pfemale intercpt mathint care apfmelt apfmpeer hrswork sdiscrim 
	matrix CORMAT=r(C)
	matrix list CORMAT, format(%10.4f)
	mat colnames CORMAT = 1_pfemaleNEPS 2_pfemaleMZ 3_intercpt 4_mathint 5_care 6_apfmelt 7_apfmpeer 8_hrswork 9_sdiscrim
	mat rownames CORMAT = 1_pfemaleNEPS 2_pfemaleMZ 3_intercpt 4_mathint 5_care 6_apfmelt 7_apfmpeer 8_hrswork 9_sdiscrim
	putexcel set "Output\correlationmatrix.xlsx", replace
	putexcel B2=matrix(CORMAT, names) using "Output\correlationmatrix.xlsx", replace
	
***		descriptives table majors		***		
*******************************************
use "Data\data_fach.dta", clear

lab val B1ber1ab1 fachnames
	
order B1ber1ab1 pfemale2 pfemale2_se intercpt intercpt_se mathint mathint_se care ///
	apfelt apfelt_se apmelt apmelt_se apfmelt apfmelt_se apfpeer apfpeer_se apmpeer apmpeer_se apfmpeer apfmpeer_se ///
	hrswork hrswork_se sdiscrim sdiscrim_se fstats2 fneps2

keep  B1ber1ab1 pfemale2 pfemale2_se intercpt intercpt_se mathint mathint_se care ///
	apfelt apfelt_se apmelt apmelt_se apfmelt apfmelt_se apfpeer apfpeer_se apmpeer apmpeer_se apfmpeer apfmpeer_se ///
	hrswork hrswork_se sdiscrim sdiscrim_se fstats2 fneps2

save "Output\fachtable.dta", replace

mkmat B1ber1ab1-fneps2, matrix(fachtable)
putexcel set "Output\fachtable.xlsx", replace
putexcel B2=matrix(fachtable, names) using "Output\fachtable.xlsx", replace
	

***		descriptive figures individual variables		***
***********************************************************
use "Data\data_choice_long.dta", clear
keep if choice==1

lab var frau "gender(0=male,1=female)"
 
hist int_doer, scheme(s1mono) by(frau, col(1)) xsize(5) ysize(5) disc width(.02) fraction
	graph save Graph "Output\bargraph_int_doer.gph", replace
	graph export "Output\bargraph_int_doer.emf", replace

hist int_thinker, scheme(s1mono) by(frau, col(1)) xsize(5) ysize(5) disc width(.02) fraction
	graph save Graph "Output\bargraph_int_thinker.gph", replace
	graph export "Output\bargraph_int_thinker.emf", replace

hist int_creator, scheme(s1mono) by(frau, col(1)) xsize(5) ysize(5) disc width(.02) fraction
	graph save Graph "Output\bargraph_int_creator.gph", replace
	graph export "Output\bargraph_int_creator.emf", replace 

hist int_helper, scheme(s1mono) by(frau, col(1)) xsize(5) ysize(5) disc width(.02) fraction
	graph save Graph "Output\bargraph_int_helper.gph", replace
	graph export "Output\bargraph_int_helper.emf", replace 
	
hist int_persuader, scheme(s1mono) by(frau, col(1)) xsize(5) ysize(5) disc width(.02) fraction
	graph save Graph "Output\bargraph_int_persuader.gph", replace
	graph export "Output\bargraph_int_persuader.emf", replace

hist int_organizer, scheme(s1mono) by(frau, col(1)) xsize(5) ysize(5) disc width(.02) fraction
	graph save Graph "Output\bargraph_int_organizer.gph", replace
	graph export "Output\bargraph_int_organizer.emf", replace
		
hist leist_md, scheme(s1mono) by(frau, col(1)) xsize(5) ysize(5) xlabel(-15(5)15) width(1) start(-15) fraction
	graph save Graph "Output\histo_mdleist.gph", replace
	graph export "Output\histo_mdleist.emf", replace

hist bread_index, scheme(s1mono) by(frau, col(1)) xsize(5) ysize(5) disc width(.02) fraction
	graph save Graph "Output\histo_breadcare.gph", replace
	graph export "Output\histo_breadcare.emf", replace

hist flexi_index, scheme(s1mono) by(frau, col(1)) xsize(5) ysize(5) disc width(.02) fraction
	graph save Graph "Output\histo_flexi.gph", replace
	graph export "Output\histo_flexi.emf", replace

	
***		descriptives table individual variables		***
*******************************************************
use "Data\data_choice_long.dta", clear
keep if choice==1

*** dissimilarity Index ***
duncan2 id_fach frau [aw=w_t13_std]

mat indtable = J(11, 6, .)
/*	
	col 1 - mean women
	col 2 - se women
	col 3 - mean men
	col 4 - se men
	col 5 - diff women - men
	col 6 - t-test
	row 1 - R
	row 2 - I
	row 3 - A
	row 4 - S
	row 5 - E
	row 6 - C
	row 7 - math pkte
	row 8 - deut pkte
	row 9 - diff
	row 10 - job value: flexi
	row 11 - job value: bread
*/
mat colnames indtable = mean_women se_women mean_men se_men diff ttest
mat rownames indtable = int_doer int_thinker int_creator int_helper int_persuader int_organizer math_pkte deut_pkte md_pkte_diff flexi bread

svyset ID_t[pw=w_t13_std]
local counter = 0	

foreach var of varlist int_doer int_thinker int_helper int_creator int_persuader int_organizer math_punkte deut_punkte ///
	leist_md flexi_index bread_index {
	
	local ++counter
	svy: mean `var', over(frau)
		mat A`counter' = r(table)
		mat indtable [`counter', 1] = A`counter'[1,2]
		mat indtable [`counter', 2] = A`counter'[2,2]
		mat indtable [`counter', 3] = A`counter'[1,1]
		mat indtable [`counter', 4] = A`counter'[2,1]
		mat indtable [`counter', 5] = A`counter'[1,2] - A`counter'[1,1]
	test [`var']1 = [`var']0
		mat indtable [`counter', 6] = r(p)
	}	
mat list indtable, format(%10.4f)
putexcel set "Output\indtable.xlsx", replace
putexcel B2=matrix(indtable, names) using "Output\indtable.xlsx", replace
*outtable using "Output\indtable", mat(indtable) format(%10.4f) replace center
drop _all
svmat indtable, names(col)
save "Output\indtable.dta", replace


***************************
*** 	appendix		***
***************************

*** job values ***
use "Data\data_choice_long.dta", clear
keep if choice==1
svyset ID_t[pw=w_t13_std]

matrix jvalues = J( 4, 6, .)
matrix colnames jvalues = 1_veryunimp 2_ 3_ 4_ 5_ 6_veryimp
matrix rownames jvalues = 1_flexi_fem 2_bread_fem 3_flexi_mal 4_bread_mal

* detailed descriptive stats of all jobvalue items by gender
local count=0
qui foreach var of varlist t66210d t66210h {
	local ++count
	svy: tab `var' if frau==1, cell
		mat A=e(Prop)
		mat jvalues[`count', 1]=A[1,1]
		mat jvalues[`count', 2]=A[2,1]
		mat jvalues[`count', 3]=A[3,1]
		mat jvalues[`count', 4]=A[4,1]
		mat jvalues[`count', 5]=A[5,1]
		mat jvalues[`count', 6]=A[6,1]
	}

local count=2
qui foreach var of varlist t66210d t66210h {
	local ++count
	svy: tab `var' if frau==0, cell
		mat B=e(Prop)
		mat jvalues[`count', 1]=B[1,1]
		mat jvalues[`count', 2]=B[2,1]
		mat jvalues[`count', 3]=B[3,1]
		mat jvalues[`count', 4]=B[4,1]
		mat jvalues[`count', 5]=B[5,1]
		mat jvalues[`count', 6]=B[6,1]
	}
		
mat list jvalues, format(%10.3f)
putexcel set "Output\indtable_jvalues.xlsx", replace
putexcel B2=matrix(jvalues, names) using "Output\indtable_jvalues.xlsx", replace

***		RIASEC detailed		***
matrix riasec = J(36, 5, .)
matrix colnames riasec = 1_verylittle 2_little 3_somewhat 4_quite 5_verymuch
matrix rownames riasec = frau_a g m	b h n c i o	d j p e k q f l r mann_a g m	b h n c i o	d j p e k q f l r

local count=0
qui foreach var of varlist t66207a t66207g t66207m t66207b t66207h t66207n t66207c t66207i t66207o t66207d t66207j t66207p t66207e t66207k t66207q t66207f t66207l t66207r {
	local ++count
	svy: tab `var' if frau==1, cell
		mat A=e(Prop)
		mat riasec[`count', 1]=A[1,1]
		mat riasec[`count', 2]=A[2,1]
		mat riasec[`count', 3]=A[3,1]
		mat riasec[`count', 4]=A[4,1]
		mat riasec[`count', 5]=A[5,1]
	}

local count=18
qui foreach var of varlist t66207a t66207g t66207m t66207b t66207h t66207n t66207c t66207i t66207o t66207d t66207j t66207p t66207e t66207k t66207q t66207f t66207l t66207r {
	local ++count
	svy: tab `var' if frau==0, cell
		mat B=e(Prop)
		mat riasec[`count', 1]=B[1,1]
		mat riasec[`count', 2]=B[2,1]
		mat riasec[`count', 3]=B[3,1]
		mat riasec[`count', 4]=B[4,1]
		mat riasec[`count', 5]=B[5,1]
	}

mat list riasec, format(%10.3f)
putexcel set "Output\indtable_riasec.xlsx", replace
putexcel B2=matrix(riasec, names) using "Output\indtable_riasec.xlsx", replace
	
* correlation matrix
corr frau int_doer int_thinker int_helper int_creator int_persuader int_organizer 
	matrix CORMAT2=r(C)
	matrix list CORMAT2, format(%10.4f)
	mat colnames CORMAT2 = 1_frau 2_int_doer 3_int_thinker 4_int_helper 5_int_creator 6_int_persuader 7_int_organizer
	mat rownames CORMAT2 = 1_frau 2_int_doer 3_int_thinker 4_int_helper 5_int_creator 6_int_persuader 7_int_organizer
	mat list CORMAT2, format(%10.3f)
	putexcel set "Output\cormat_riasec.xlsx", replace
	putexcel B2=matrix(CORMAT2, names) using "Output\cormat_riasec.xlsx", replace

* cronbachs alpha	
alpha t66207a t66207g t66207m
alpha t66207b t66207h t66207n
alpha t66207c t66207i t66207o
alpha t66207d t66207j t66207p
alpha t66207e t66207k t66207q
alpha t66207f t66207l t66207r
	
* factor analysis
factor t66207a t66207g t66207m t66207b t66207h t66207n t66207c t66207i t66207o t66207d t66207j t66207p t66207e t66207k t66207q t66207f t66207l t66207r, ipf factor(6)
	rotate, varimax horst
	
* fraction in major by gender
matrix addtable = J( 23, 5, .)
mat colnames addtable = row prop_females se_females prop_males se_males

forvalues i = 1/23	{
	svy: mean fach`i' if frau == 1
		mat addtable[`i', 1]= `i'
		mat addtable[`i', 2]= _b[fach`i']
		mat addtable[`i', 3]= _se[fach`i']
	svy: mean fach`i' if frau == 0
		mat addtable[`i', 4]= _b[fach`i']
		mat addtable[`i', 5]= _se[fach`i']
	}
	
mat list addtable, format(%10.3f)
putexcel set "Output\table_A10.xlsx", replace
putexcel B2=matrix(addtable, names) using "Output\table_A10.xlsx", replace
	
***	END OF DOFILE ***
log close
exit, clear
